package com.outsider.util.pageprocessor;

import java.text.SimpleDateFormat;
import java.util.Date;

import org.eclipse.jetty.util.UrlEncoded;

import com.outsider.util.Const;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

public class DD_SearchPageprocessor implements PageProcessor {
	private Site site = Site.me().setUserAgent(Const.USER_AGENT).setSleepTime(1).setRetryTimes(3);
	@Override
	public void process(Page page) {
//		long start=System.currentTimeMillis();
//		System.out.println("DD:"+System.currentTimeMillis());
		if (page.getHtml().xpath("//*[@class=\"line1\"]").toString()==null) return;
		String name=page.getHtml().xpath("//*[@name=\"title\" and @class=\"name\"]/a").regex("title=\"(.*?)\"").toString();
		String image=page.getHtml().xpath("//*[@class=\"line1\"]/a/img").regex("src=\"(.*?)\"").toString();
		String price=page.getHtml().xpath("//*[@class=\"line1\"]/[@class=\"price\"]/span/text()").toString().substring(1);
		String url=page.getHtml().xpath("//*[@class=\"line1\"]/a").regex("href=\"(.*?)\"").toString();


		page.putField("name", name);
		page.putField("price", price);
		page.putField("image", image);
		page.putField("url", url);
		System.out.println("DD:"+new SimpleDateFormat(Const.DATE_FORMAT).format(new Date()));
	}

	@Override
	public Site getSite() {
		return site;
	}
	
	public static void main(String args[]) {
		Spider.create(new DD_SearchPageprocessor()).test("http://search.dangdang.com/?key=从你的全世界路过&show=big#J_tab");
		;
	}
}
